*===============================================================================
*
*					WORKER BELIEFS ABOUT OUTSIDE OPTIONS
*		(c)	Simon Jaeger, Christopher Roth, Nina Roussille, Benjamin Schoefer
*							  2023 December 5
*						   	   SOEP-IAB Data 
*
*===============================================================================


********************************************************************************
*								Figure B04	 								   *
********************************************************************************

cap log close                
log using ${log}/FigureB04ab.log, replace    
set seed 6000


*** loading full IAB dataset
use prs_id betnr ieb_beg_epi ieb_end_epi ieb_tag_entg ieb_quellverf_id berufstellg_imp ieb_dba_id geb_dat ieb_beruf_kons_num using "$orig/coworker_fullhist_IEB.dta", clear

gen jahr = year(ieb_beg_epi)

* education data (for matching similar-education coworkers)
rename ieb_dba_id ausbildung
gen educ = .
replace educ = 1 if ausbildung==1
replace educ = 2 if ausbildung==2
replace educ = 3 if ausbildung==11 | ausbildung==12
replace educ = 4 if educ==.

* 1-digit occupation (for same-occupation coworkers)
gen occ1 = floor(ieb_beruf_kons_num/10000)

* age (for similar-age coworkers)
gen yob = year(geb_dat)
gen age = jahr-yob

gen agecat = .
replace agecat = 1 if age<30
replace agecat = 2 if inrange(age,30,39)
replace agecat = 3 if inrange(age,40,49)
replace agecat = 4 if inrange(age,50,59)
replace agecat = 5 if inrange(age,60,69)
replace agecat = 6 if age>69 & age!=.
replace agecat = 7 if age==.



*** identifying SOEP individuals (for greater sample size, we consider anyone from the full SOEP history)
getlink, mergevar(prs_id)
gen gsoep_broad = _merge==3
cap drop _merge



*** identifying EUE movers

* keeping employment and UI observations
keep if betnr!=. | (ieb_quellverf_id==2 & ieb_tag_entg>0 & ieb_tag_entg!=.)

* identifying EUE moves (this is the same procedure as in 2_coworker_wagechanges)
* we repeat the procedure here because we're now doing it for the full IAB dataset, not just 2015-2019
preserve
	
	keep if ieb_quellverf_id==2
	save "$data/FigureB04ab_tempfile1.dta", replace
	
restore
	
	* saving dataset of just highest-earning employment spells
preserve
	
	keep if betnr!=.
		
	gen spell_earnings = (ieb_end_epi-ieb_beg_epi+1)*ieb_tag_entg
	drop if spell_earnings == . | spell_earnings<0
	gen sort_spell_earnings = -spell_earnings
	gen random_sort_var = runiform()		
	bys prs_id jahr (sort_spell_earnings random_sort_var): gen priority_spell = _n
	keep if priority_spell==1
	drop random_sort_var 
	
	save "$data/FigureB04ab_tempfile2.dta", replace
		
restore
	
* appending both datasets
use "$data/FigureB04ab_tempfile1.dta", clear
append using "$data/FigureB04ab_tempfile2.dta"
	
	
	
*** identifying movers experiencing an intermediate unemployment spell
	
* restricting to people who have an unemployment spell somewhere in the dataset
sort prs_id ieb_beg_epi
gen dummy = ieb_quellverf_id==2
egen maxdummy = max(dummy), by(prs_id)
keep if maxdummy==1
	
* dropping "next" spell if it overlaps with current spell by more than 4 weeks
drop if dummy==1 & prs_id==prs_id[_n-1] & ieb_beg_epi-ieb_end_epi[_n-1]<=-5
* this is necessary because some unemployment spells overlap with the prior employment spell
	
* generating a flag for being succeeded by an unemployment spell of sufficient length
gen keepdummy = 0
replace keepdummy = 1 if prs_id==prs_id[_n+1] & betnr!=. & ieb_quellverf_id[_n+1]==2 & ///
	(ieb_beg_epi[_n+1]-ieb_end_epi<84) & (ieb_beg_epi[_n+1]-ieb_end_epi>-5)
* note we can't restrict to keepdummy==1 yet, since we still need to calculate wages at next employer!
* so we need the "employed at next employer" observations to remain in the dataset for now
	
* _now_ we can finally restrict to just employment spells
keep if betnr!=.
	


*** other misc variable cleaning

* fulltime
replace berufstellg_imp = . if berufstellg_imp<0 
gen fulltime = !(inlist(berufstellg_imp,8,9,21))
replace fulltime = . if berufstellg_imp==.

* winsorizing log wage
gen ln_wage_non_winsorized = ln(ieb_tag_entg)
cap noisily winsor ln_wage_non_winsorized, p(0.02) generate(ln_wage)

*** calculating mover wage changes
bys prs_id (jahr): gen delta_ln_wage_mover = ln_wage[_n+1]-ln_wage if (betnr[_n+1]!=betnr) & (jahr[_n+1]==jahr+1)

*** restricting to FT-to-FT movers
bys prs_id (jahr): gen fulltimenext = fulltime[_n+1] if (betnr[_n+1]!=betnr) & (jahr[_n+1]==jahr+1) // fulltime status at next job
replace delta_ln_wage_mover = . if !(fulltime==1 & fulltimenext==1)	

*** occupation at next job
bys prs_id (jahr): gen nextocc = `occvar'[_n+1]

save "$data/FigureB04ab_tempfile3.dta", replace

*** looping over whether we consider all moves or just EUE moves
foreach sample in "unemp" "" {

	if "`sample'"=="" local extrasuff "_all" // all moves
	if "`sample'"=="unemp" local extrasuff "_unemp" // just EUE moves

	use "$data/FigureB04ab_tempfile3.dta", clear
	
	if "`sample'"=="unemp" keep if keepdummy==1 // restricting to EUE moves if relevant

	* identifying workplaces containing a SOEP respondent (broadly construed)
	egen maxgsoep = max(gsoep_broad), by(betnr)
	
	*** generating leave-out means of coworker wage changes
	gen coworker_mean_change = .
	gen sd_coworker_mean_change = .	
	gen num_people_perm = .
	
	* rolling 5-year windows: mean coworker wage changes in 5 years preceding a person's move
	cap drop dummy
	forvalues year = 1988/2019 {
	
		* defining the year window
		cap drop 
		
		gen insample = inrange(jahr,`year'-4,`year') 
		gen dummy = delta_ln_wage_mover!=.
		
		gen inctemp = ieb_tag_entg if insample==1 
					
		*** leave-out mean of coworker wage changes
		
		egen sum_delta = sum(delta_ln_wage_mover), by(betnr insample)
		egen num_people = sum(dummy), by(betnr insample)
		
		gen leave_out = (sum_delta-delta_ln_wage_mover)/(num_people-1)
		
		replace coworker_mean_change = leave_out if jahr==`year'
			
		*** SD of leave-out coworker wage changes (used later for the empirical bayes correction of mean coworker wage changes)
		
		gen tempdiff = (delta_ln_wage_mover-leave_out)^2
		
		egen tempdiffsum = sum(tempdiff), by(betnr insample)
		
		gen sd_leave_out = sqrt((tempdiffsum-tempdiff)/(num_people-1))
		
		replace sd_coworker_mean_change = sd_leave_out if jahr==`year'
		
		replace num_people_perm = num_people if jahr==`year'
		
		drop sum_delta num_people leave_out sd_leave_out tempdiff tempdiffsum insample dummy inctemp 

		
	}
		
	
	* empirical bayes correction of mean coworker wage changes					
	gen se = sd_coworker_mean_change/sqrt(num_people_perm-1)
	ebayes coworker_mean_change se, gen(ebayes_mean_change)
	drop se
		
	keep if gsoep_broad==1 // keeping SOEP individuals	
	keep if ebayes_mean_change!=. // keeping individuals with nonmissing coworker wage changes

	* winsorizing 
	foreach var in delta_ln_wage_mover ebayes_mean_change {
		rename `var' _`var'
		winsor _`var', p(0.02) gen(`var')
		drop _`var'
	}
		
	egen maxyear = max(jahr), by(prs_id) // keeping each individual's latest observed move
	keep if maxyear==jahr
	drop maxyear
	
	reg delta_ln_wage_mover ebayes_mean_change, robust // running the prediction regression
		local coef = string(round(_b[ebayes_mean_change],0.001),"%4.3f") 
		local se = string(round(_se[ebayes_mean_change],0.001),"%4.3f")
		local numobs = e(N)
	
	* determining plot axis ranges
	pctile xpctile = ebayes_mean_change, nq(20)
	su xpctile
	local xloc = r(min)
	
	egen mean_by_ventile = mean(delta_ln_wage_mover), by(xpctile)
	su mean_by_ventile
	local yloc = r(min)
	
	drop xpctile mean_by_ventile
	
	* creating binscatter plot
	binscatter delta_ln_wage_mover ebayes_mean_change, ytitle("SOEP Mover Wage Changes") ///
		xtitle("Mean Coworker Wage Changes") ylabel(#10,grid) xlabel(#10,grid) ///
		legend(on order(- "Slope: `coef' (SE `se')") region(lwidth(none)) pos(11) ring(0))
	
	if "`sample'"=="unemp"{
		graph export ${log}/FigureB04a.pdf, replace		
	}
	if "`sample'"==""{
		graph export ${log}/FigureB04b.pdf, replace		
	}

}





cap log close
clear
